import urllib.request
import re


def get_html(url):
    '''获取html信息'''
    headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }
    req = urllib.request.Request(url, headers=headers)
    res = urllib.request.urlopen(req)
    return res.read().decode('utf-8')


def get_content(url):
    html = get_html(url)
    pat = r'<h3><a href="(.*?)">'
    re_url = re.compile(pat, re.S)
    link_list = re_url.findall(html)
    for i in link_list:
        link_url = 'http://www.quanwenyuedu.io' + str(i) + 'xiaoshuo.html'
        new_html = get_html(link_url)
        pat_item = r'<li><a href="(.*?)">'
        re_url_item = re.compile(pat_item, re.S)
        link_list_item = re_url_item.findall(new_html)
        # print(link_list_item)
        for j in link_list_item:
            content_url = 'http://www.quanwenyuedu.io' + str(i) + str(j)
            # print(content_url)
            html = get_html(content_url)
            pat = r'<div class="articlebody" id="content">(.*?)</div>'
            re_url = re.compile(pat, re.S)
            content = re_url.findall(html)

            name_pat = r'<a href="./">(.*?)</a>'
            name_re_url = re.compile(name_pat, re.S)
            name_file = name_re_url.findall(html)

            str_name = ''.join(name_file)
            with open(str_name + '.txt', 'a') as f:
                for item in content:
                    f.write(item.replace('<p>','').replace('</p>',''))
                    

url = 'http://www.quanwenyuedu.io/c/2.html'
get_content(url)
